#include <iostream>
#include <thread>
#include <vector>
using namespace std;
typedef vector<vector<int>> vector2d;
void Thread2F(vector<int> Ai, vector<int> Bj, int* Cij){
int n=Ai.size();
for(int k=0; k<n; ++k) *Cij+=Ai[k]*Bj[k];
}
void Thread1F(vector<int> Ai, vector2d B, vector<int>* Ci){
int n=Ai.size();
vector<thread> threads;
for(int j=0; j<n; ++j){
vector<int> Bj(n);
for(int i=0; i<n; ++i){
Bj[i]=B[i][j];
}
(*Ci)[j]=0;
threads.push_back(thread(Thread2F, Ai, Bj, &(*Ci)[j]));
}
for(int j=0; j<n; ++j) threads[j].join();
}
vector2d PSquareMatrixMultiply(vector2d A, vector2d B){
assert(A[0].size()==B.size() && A.size()==B[0].size());
int n=A.size();
vector<thread> threads;
vector2d C(n, vector<int>(n, 0));
for(int i=0; i<n; ++i){
threads.push_back(thread(Thread1F, A[i], B, &(C[i])));
}
for(int i=0; i<n; ++i) threads[i].join();
return C;
}
int main(void){
vector2d A={
{1, 2, 3, 4, 5, 6, 7, 8, 9},
{1, 2, 3, 4, 5, 6, 7, 8, 9},
{1, 2, 3, 4, 5, 6, 7, 8, 9},
{1, 2, 3, 4, 5, 6, 7, 8, 9},
{1, 2, 3, 4, 5, 6, 7, 8, 9},
{1, 2, 3, 4, 5, 6, 7, 8, 9},
{1, 2, 3, 4, 5, 6, 7, 8, 9},
{1, 2, 3, 4, 5, 6, 7, 8, 9},
{1, 2, 3, 4, 5, 6, 7, 8, 9}
};
vector2d B={
{9, 8, 7, 6, 5, 4, 3, 2, 1},
{9, 8, 7, 6, 5, 4, 3, 2, 1},
{9, 8, 7, 6, 5, 4, 3, 2, 1},
{9, 8, 7, 6, 5, 4, 3, 2, 1},
{9, 8, 7, 6, 5, 4, 3, 2, 1},
{9, 8, 7, 6, 5, 4, 3, 2, 1},
{9, 8, 7, 6, 5, 4, 3, 2, 1},
{9, 8, 7, 6, 5, 4, 3, 2, 1},
{9, 8, 7, 6, 5, 4, 3, 2, 1},
};
vector2d C=PSquareMatrixMultiply(A, B);
for(int i=0; i<C.size(); ++i){
for(int j=0; j<C[0].size(); ++j) std::cout<<C[i][j]<<' ';
std::cout<<'\n';
}
std::cout<<std::endl;
return 0;
}